Looking at wave 03 (number of seeds is now an input).
Following measures show clear differences between model and observations for reasonable proportion of runs. Can we emulate them well enough to exploit this in history matching?
All the above are in week 12, and could also consider at earlier times, however time series plot below shows that getting right in week 12 does often naturally force runs to match earlier observations, so may not need to explicitly consider at wave 1.
We could also consider age/spatial distribution of hospital admissions rather than just deaths, however as mentioned below, this data currently seems less reliable up to March 21st.
ggplot(subset(CombinedData, week == 12), aes(x = log(cumH + 1), y = log(deaths + 1), col = ns)) +
geom_point() +
geom_hline(yintercept = c(log(250+1), log(2500+1))) +
geom_vline(xintercept = c(log(2500+1), log(10000+1))) +
scale_color_viridis() +
labs(col = 'Seeds', x = 'log(hosps)', y = 'log(deaths)')
ggplot(subset(CombinedData, week == 12), aes(x = log(cumH + 1), y = log(deaths + 1), col = R0)) +
geom_point() +
geom_hline(yintercept = c(log(250+1), log(2500+1))) +
geom_vline(xintercept = c(log(2500+1), log(10000+1))) +
scale_color_viridis() +
labs(x = 'log(hosps)', y = 'log(deaths)')
ggplot(subset(CombinedData, week == 12), aes(x = ns, y = R0, col = log(deaths + 1))) +
geom_point(size = 2) +
scale_color_viridis() +
labs(col = 'log(deaths)')
alphaI1D, alphaHD.ggplot(data = all_week12, aes(x = log(cumCD+1), y = log(cumHD+1))) + geom_point()
ggplot(data = NewDesign, aes(x = alphaI1D, y = alphaHD)) + geom_point(col = ifelse(all_week12$cumCD < 10 | all_week12$cumHD < 10, 'red', 'green'))
c(sum(subset(byDayDeaths, as.character(date) <= EndDate)$value), subset(phe_deaths, date == EndDate)$cumDeaths28DaysByDeathDate, subset(phe_deaths_cert, date == EndDate)$cumDailyNsoDeathsByDeathDate)
## [1] 616 550 569
ggplot(CombinedData, aes(x = week, y = log(deaths + 1), alpha = nroy, linetype = output, col = nroy)) +
geom_line() +
theme(legend.position = "none") +
scale_linetype_manual(values = rep(1, 224)) +
geom_point(data = obs_data, aes(y = median, col = NULL, alpha = NULL, linetype = NULL)) +
geom_point(data = obs_data, aes(y = lower, col = NULL, alpha = NULL, linetype = NULL)) +
geom_point(data = obs_data, aes(y = upper, col = NULL, alpha = NULL, linetype = NULL)) +
labs(y = 'log(deaths)')
Instead coloured by ns:
ggplot(CombinedData, aes(x = week, y = log(deaths + 1), linetype = output, col = ns)) +
geom_line(alpha = 0.5) +
scale_color_viridis() +
scale_linetype_manual(values = rep(1, 224), guide=FALSE) +
geom_point(data = obs_data, aes(y = median, col = NULL, alpha = NULL, linetype = NULL)) +
geom_point(data = obs_data, aes(y = lower, col = NULL, alpha = NULL, linetype = NULL)) +
geom_point(data = obs_data, aes(y = upper, col = NULL, alpha = NULL, linetype = NULL)) +
labs(y = 'log(deaths)')
Observations: for England, daily admissions data only publicly available from 19th March onwards (total = 2054), and all PHE cumulative totals start from this date. At trust level, have earlier data, but this is generally an undercount, with 1373 total up to 21st March.
So also likely an underestimate, consider runs within 2500-10000 as acceptable for now.
2 runs have fewer than 2.5k admissions, 204 runs have greater than 10k admissions up to 21st March.
Plotting deaths vs hospitalisations, and colouring green for runs that passed the deaths/CD/HD constraints:
ggplot(subset(CombinedData, week == 12), aes(x = log(cumH+1), y = log(deaths+1), col = nroy)) +
geom_point() +
geom_hline(yintercept = c(log(250+1), log(2500+1))) +
geom_vline(xintercept = c(log(2500+1), log(10000+1))) +
scale_color_manual(values = c('grey', 'green')) +
labs(x = 'log(hosps)', y = 'log(deaths)')
deathsByAgeTotalMW
## age deaths prop
## 1 4 3 0.006
## 2 5 11 0.020
## 3 6 35 0.065
## 4 7 63 0.116
## 5 8 426 0.786
ggplot(tmp_data, aes(x = age, y = log(deaths), linetype = output, col = nroy, alpha = nroy)) +
geom_line() +
theme(legend.position = "none") +
labs(y = 'log(Deaths)') +
scale_linetype_manual(values = rep(1, 224)) +
scale_color_manual(values = c('grey', 'green')) +
scale_alpha_manual(values = c(0.25,1)) +
geom_point(data = deathsByAgeTotalMW, aes(y = log(deaths), col = NULL, linetype = NULL, alpha = NULL))
ggplot(tmp_data, aes(x = age, y = prop, linetype = output, col = nroy, alpha = nroy)) +
geom_line() +
theme(legend.position = "none") +
labs(y = 'Proportion') +
scale_linetype_manual(values = rep(1, 224)) +
scale_color_manual(values = c('grey', 'green')) +
scale_alpha_manual(values = c(0.25,1)) +
ylim(0,1) +
geom_point(data = deathsByAgeTotalMW, aes(y = deaths / sum(deathsByAgeTotal$deaths), col = NULL, linetype = NULL, alpha = NULL))
#### Some emulators ####
mogp_dir <- '/Users/jamessalter/mogp_emulator/'
setwd('~/Documents/ExeterUQ_MOGP/')
source('BuildEmulator/BuildEmulator.R')
source('~/Documents/mogp_new_wrappers.R')
source('~/Documents/mogp_basis.R')
#new.choices <- choices.default
#new.choices$lm.maxdf <- 10 # if want to change priors, add this as list in BuildNewEmulators
# Usually scale design to [-1,1]
#### THIS ISN'T RIGHT AT THE MOMENT - NEED TO USE OVERALL MAX/MIN FOR EACH PARAMETER RATHER THAN JUST IN DESIGN ####
ScaledDesign <- NewDesign
for (i in 1:14){
ScaledDesign[,i] <- ScaledDesign[,i] - min(ScaledDesign[,i])
ScaledDesign[,i] <- ScaledDesign[,i] / max(ScaledDesign[,i] / 2)
ScaledDesign[,i] <- ScaledDesign[,i] - 1
}
# Join to MW data
CombinedDataScaled <- all_week_seeds %>% left_join(ScaledDesign, by = c('output' = 'output'))
# Create tData, containing training data for emulator
tData <- data.frame(subset(CombinedDataScaled, week == 12)[1:100,7:20],
Noise = runif(100),
LogDeaths = log(subset(CombinedDataScaled, week == 12)$deaths[1:100] + 1))
Em_LogDeaths <- BuildNewEmulators(tData = tData, HowManyEmulators = 1, meanFun="fitted")
LOO.plot(Emulators = Em_LogDeaths, which.emulator = 1,
ParamNames = colnames(tData)[Em_LogDeaths$fitting.elements$ActiveIndices[[1]]])
summary(Em_LogDeaths$fitting.elements$lm.object[[1]]$linModel)
save_ExUQmogp(Em_LogDeaths, file = 'Em_LogDeaths')